#!/bin/bash

export BASE_DATA_PATH=/home/mixagol/data
export BASE_DATE=20120802
export CUR_DATA_DIR=${BASE_DATA_PATH}/2_raw_matrix_find/${BASE_DATE}/wu_blast
export LC_ALL=C

export RESULTS_DIR=/home/mixagol/work/data/results_wu_blast/
mkdir -p $RESULTS_DIR/output

rm -rf /home/mixagol/work/data/genes_good_faa
cp -r ${BASE_DATA_PATH}/1_databases/${BASE_DATE}/genes_good_faa /home/mixagol/work/data/
rm -rf /home/mixagol/work/data/wu_blast_db/
cp -r ${BASE_DATA_PATH}/1_databases/${BASE_DATE}/wu_blast_db /home/mixagol/work/data/

2_raw_matrix_wu_blast/run_blast_cluster.sh 

mkdir -p ${CUR_DATA_DIR}/output/
# crontab
# 0 8 * * * find ~/work/data/results_wu_blast/output/ -type f -mmin +600 | xargs -I {} mv {} ${CUR_DATA_DIR}/output/

find ~/work/data/results_wu_blast/output/ -type f | xargs -I {} mv {} ${CUR_DATA_DIR}/output/

ls ${CUR_DATA_DIR}/output/* | xargs zcat | 2_raw_matrix_wu_blast/convert_output.py | gzip > ${CUR_DATA_DIR}/results.txt.gz

rm -rf /home/mixagol/work/data/genes_good_faa
rm -rf /home/mixagol/work/data/wu_blast_db/

zcat ${CUR_DATA_DIR}/results.txt.gz \
    | awk -F'\t' '$3<0.05' \
    | sort -t$'\t' -S2G -T ${CUR_DATA_DIR} -k1,1 | gzip \
    > ${CUR_DATA_DIR}/results_0.05_sorted.txt.gz
